From daf733cf74e16ad6a9d1fce61db250a346ea0209 Mon Sep 17 00:00:00 2001 From: Scott Egerton Date: Thu, 5 Sep 2024 12:33:35 +0100 Subject: [PATCH] [AMDGPU] Remove unused VGPRSingleUseHintInsts feature --- llvm/docs/AMDGPUUsage.rst | 4 +- llvm/lib/Target/AMDGPU/AMDGPU.h | 3 - llvm/lib/Target/AMDGPU/AMDGPU.td | 13 - .../AMDGPU/AMDGPUInsertSingleUseVDST.cpp | 245 --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 10 - llvm/lib/Target/AMDGPU/CMakeLists.txt | 1 - llvm/lib/Target/AMDGPU/GCNSubtarget.h | 3 - llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 - llvm/lib/Target/AMDGPU/SOPInstructions.td | 11 - .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 18 - llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 2 + llvm/lib/Target/AMDGPU/VOP1Instructions.td | 18 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 6 +- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 35 +- llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 12 +- llvm/lib/Target/AMDGPU/VOPCInstructions.td | 13 +- llvm/lib/Target/AMDGPU/VOPInstructions.td | 20 +- .../CodeGen/AMDGPU/insert-singleuse-vdst.mir | 1420 ----------------- llvm/test/MC/AMDGPU/gfx1150_asm_sopp.s | 10 - llvm/test/MC/AMDGPU/gfx11_unsupported.s | 3 - llvm/test/MC/AMDGPU/gfx12_asm_sopp.s | 9 - .../MC/Disassembler/AMDGPU/decode-err.txt | 5 - .../Disassembler/AMDGPU/gfx1150_dasm_sopp.txt | 10 - .../Disassembler/AMDGPU/gfx12_dasm_sopp.txt | 8 - .../secondary/llvm/lib/Target/AMDGPU/BUILD.gn | 1 - 25 files changed, 34 insertions(+), 1848 deletions(-) delete mode 100644 llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp delete mode 100644 llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir delete mode 100644 llvm/test/MC/AMDGPU/gfx1150_asm_sopp.s delete mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_sopp.txt diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 4b48b54b18bb99..9e11b13c101d47 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -611,9 +611,7 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor - ``gfx1152`` SALU floating point instructions - and single-use VGPR hint - instructions are not available - on: + are not available on: - ``gfx1150`` - ``gfx1151`` diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index b2dd354e496a2e..4abb5a63ab6d2c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -405,9 +405,6 @@ extern char &SIModeRegisterID; void initializeAMDGPUInsertDelayAluPass(PassRegistry &); extern char &AMDGPUInsertDelayAluID; -void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &); -extern char &AMDGPUInsertSingleUseVDSTID; - void initializeSIInsertHardClausesPass(PassRegistry &); extern char &SIInsertHardClausesID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 5757ac0d4454d0..66474dfcbb5cc0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -929,12 +929,6 @@ def FeatureSALUFloatInsts : SubtargetFeature<"salu-float", "Has SALU floating point instructions" >; -def FeatureVGPRSingleUseHintInsts : SubtargetFeature<"vgpr-singleuse-hint", - "HasVGPRSingleUseHintInsts", - "true", - "Has single-use VGPR hint instructions" ->; - def FeaturePseudoScalarTrans : SubtargetFeature<"pseudo-scalar-trans", "HasPseudoScalarTrans", "true", @@ -1615,14 +1609,12 @@ def FeatureISAVersion11_5_0 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureSALUFloatInsts, FeatureDPPSrc1SGPR, - FeatureVGPRSingleUseHintInsts, FeatureRequiredExportPriority])>; def FeatureISAVersion11_5_1 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureSALUFloatInsts, FeatureDPPSrc1SGPR, - FeatureVGPRSingleUseHintInsts, Feature1_5xVGPRs, FeatureRequiredExportPriority])>; @@ -1630,7 +1622,6 @@ def FeatureISAVersion11_5_2 : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureSALUFloatInsts, FeatureDPPSrc1SGPR, - FeatureVGPRSingleUseHintInsts, FeatureRequiredExportPriority])>; def FeatureISAVersion12 : FeatureSet< @@ -1663,7 +1654,6 @@ def FeatureISAVersion12 : FeatureSet< FeatureSALUFloatInsts, FeaturePseudoScalarTrans, FeatureHasRestrictedSOffset, - FeatureVGPRSingleUseHintInsts, FeatureScalarDwordx3Loads, FeatureDPPSrc1SGPR, FeatureMaxHardClauseLength32, @@ -2267,9 +2257,6 @@ def HasNotMADIntraFwdBug : Predicate<"!Subtarget->hasMADIntraFwdBug()">; def HasSALUFloatInsts : Predicate<"Subtarget->hasSALUFloatInsts()">, AssemblerPredicate<(all_of FeatureSALUFloatInsts)>; -def HasVGPRSingleUseHintInsts : Predicate<"Subtarget->hasVGPRSingleUseHintInsts()">, - AssemblerPredicate<(all_of FeatureVGPRSingleUseHintInsts)>; - def HasPseudoScalarTrans : Predicate<"Subtarget->hasPseudoScalarTrans()">, AssemblerPredicate<(all_of FeaturePseudoScalarTrans)>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp deleted file mode 100644 index 43b3bf43fe56db..00000000000000 --- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp +++ /dev/null @@ -1,245 +0,0 @@ -//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -/// \file -/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU -/// instructions that produce single-use VGPR values. If the value is forwarded -/// to the consumer instruction prior to VGPR writeback, the hardware can -/// then skip (kill) the VGPR write. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPUGenSearchableTables.inc" -#include "GCNSubtarget.h" -#include "SIInstrInfo.h" -#include "SIRegisterInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/Register.h" -#include "llvm/IR/DebugLoc.h" -#include "llvm/MC/MCRegister.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Pass.h" -#include - -using namespace llvm; - -#define DEBUG_TYPE "amdgpu-insert-single-use-vdst" - -namespace { -class AMDGPUInsertSingleUseVDST : public MachineFunctionPass { -private: - const SIInstrInfo *SII; - class SingleUseInstruction { - private: - static const unsigned MaxSkipRange = 0b111; - static const unsigned MaxNumberOfSkipRegions = 2; - - unsigned LastEncodedPositionEnd; - MachineInstr *ProducerInstr; - - std::array SingleUseRegions; - SmallVector SkipRegions; - - // Adds a skip region into the instruction. - void skip(const unsigned ProducerPosition) { - while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) { - SkipRegions.push_back(MaxSkipRange); - LastEncodedPositionEnd += MaxSkipRange; - } - SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd); - LastEncodedPositionEnd = ProducerPosition; - } - - bool currentRegionHasSpace() { - const auto Region = SkipRegions.size(); - // The first region has an extra bit of encoding space. - return SingleUseRegions[Region] < - ((Region == MaxNumberOfSkipRegions) ? 0b1111U : 0b111U); - } - - unsigned encodeImm() { - // Handle the first Single Use Region separately as it has an extra bit - // of encoding space. - unsigned Imm = SingleUseRegions[SkipRegions.size()]; - unsigned ShiftAmount = 4; - for (unsigned i = SkipRegions.size(); i > 0; i--) { - Imm |= SkipRegions[i - 1] << ShiftAmount; - ShiftAmount += 3; - Imm |= SingleUseRegions[i - 1] << ShiftAmount; - ShiftAmount += 3; - } - return Imm; - } - - public: - SingleUseInstruction(const unsigned ProducerPosition, - MachineInstr *Producer) - : LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer), - SingleUseRegions({1, 0, 0}) {} - - // Returns false if adding a new single use producer failed. This happens - // because it could not be encoded, either because there is no room to - // encode another single use producer region or that this single use - // producer is too far away to encode the amount of instructions to skip. - bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) { - // Producer is too far away to encode into this instruction or another - // skip region is needed and SkipRegions.size() = 2 so there's no room for - // another skip region, therefore a new instruction is needed. - if (LastEncodedPositionEnd + - (MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) < - ProducerPosition) - return false; - - // If a skip region is needed. - if (LastEncodedPositionEnd != ProducerPosition || - !currentRegionHasSpace()) { - // If the current region is out of space therefore a skip region would - // be needed, but there is no room for another skip region. - if (SkipRegions.size() == MaxNumberOfSkipRegions) - return false; - skip(ProducerPosition); - } - - SingleUseRegions[SkipRegions.size()]++; - LastEncodedPositionEnd = ProducerPosition + 1; - ProducerInstr = MI; - return true; - } - - auto emit(const SIInstrInfo *SII) { - return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(), - SII->get(AMDGPU::S_SINGLEUSE_VDST)) - .addImm(encodeImm()); - } - }; - -public: - static char ID; - - AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {} - - void insertSingleUseInstructions( - ArrayRef> SingleUseProducers) const { - SmallVector Instructions; - - for (auto &[Position, MI] : SingleUseProducers) { - // Encode this position into the last single use instruction if possible. - if (Instructions.empty() || - !Instructions.back().tryAddProducer(Position, MI)) { - // If not, add a new instruction. - Instructions.push_back(SingleUseInstruction(Position, MI)); - } - } - - for (auto &Instruction : Instructions) - Instruction.emit(SII); - } - - bool runOnMachineFunction(MachineFunction &MF) override { - const auto &ST = MF.getSubtarget(); - if (!ST.hasVGPRSingleUseHintInsts()) - return false; - - SII = ST.getInstrInfo(); - const auto *TRI = &SII->getRegisterInfo(); - bool InstructionEmitted = false; - - for (MachineBasicBlock &MBB : MF) { - DenseMap RegisterUseCount; - - // Handle boundaries at the end of basic block separately to avoid - // false positives. If they are live at the end of a basic block then - // assume it has more uses later on. - for (const auto &Liveout : MBB.liveouts()) { - for (MCRegUnitMaskIterator Units(Liveout.PhysReg, TRI); Units.isValid(); - ++Units) { - const auto [Unit, Mask] = *Units; - if ((Mask & Liveout.LaneMask).any()) - RegisterUseCount[Unit] = 2; - } - } - - SmallVector> - SingleUseProducerPositions; - - unsigned VALUInstrCount = 0; - for (MachineInstr &MI : reverse(MBB.instrs())) { - // All registers in all operands need to be single use for an - // instruction to be marked as a single use producer. - bool AllProducerOperandsAreSingleUse = true; - - // Gather a list of Registers used before updating use counts to avoid - // double counting registers that appear multiple times in a single - // MachineInstr. - SmallVector RegistersUsed; - - for (const auto &Operand : MI.all_defs()) { - const auto Reg = Operand.getReg(); - - const auto RegUnits = TRI->regunits(Reg); - if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) { - return RegisterUseCount[Unit] > 1; - })) - AllProducerOperandsAreSingleUse = false; - - // Reset uses count when a register is no longer live. - for (const MCRegUnit Unit : RegUnits) - RegisterUseCount.erase(Unit); - } - - for (const auto &Operand : MI.all_uses()) { - const auto Reg = Operand.getReg(); - - // Count the number of times each register is read. - for (const MCRegUnit Unit : TRI->regunits(Reg)) { - if (!is_contained(RegistersUsed, Unit)) - RegistersUsed.push_back(Unit); - } - } - for (const MCRegUnit Unit : RegistersUsed) - RegisterUseCount[Unit]++; - - // Do not attempt to optimise across exec mask changes. - if (MI.modifiesRegister(AMDGPU::EXEC, TRI) || - AMDGPU::isInvalidSingleUseConsumerInst(MI.getOpcode())) { - for (auto &UsedReg : RegisterUseCount) - UsedReg.second = 2; - } - - if (!SIInstrInfo::isVALU(MI) || - AMDGPU::isInvalidSingleUseProducerInst(MI.getOpcode())) - continue; - if (AllProducerOperandsAreSingleUse) { - SingleUseProducerPositions.push_back({VALUInstrCount, &MI}); - InstructionEmitted = true; - } - VALUInstrCount++; - } - insertSingleUseInstructions(SingleUseProducerPositions); - } - return InstructionEmitted; - } -}; -} // namespace - -char AMDGPUInsertSingleUseVDST::ID = 0; - -char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID; - -INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE, - "AMDGPU Insert SingleUseVDST", false, false) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 04fdee0819b502..abd50748f2cc05 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -311,12 +311,6 @@ static cl::opt EnableSIModeRegisterPass( cl::init(true), cl::Hidden); -// Enable GFX11.5+ s_singleuse_vdst insertion -static cl::opt - EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst", - cl::desc("Enable s_singleuse_vdst insertion"), - cl::init(false), cl::Hidden); - // Enable GFX11+ s_delay_alu insertion static cl::opt EnableInsertDelayAlu("amdgpu-enable-delay-alu", @@ -450,7 +444,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeAMDGPURewriteUndefForPHILegacyPass(*PR); initializeAMDGPUUnifyMetadataPass(*PR); initializeSIAnnotateControlFlowLegacyPass(*PR); - initializeAMDGPUInsertSingleUseVDSTPass(*PR); initializeAMDGPUInsertDelayAluPass(*PR); initializeSIInsertHardClausesPass(*PR); initializeSIInsertWaitcntsPass(*PR); @@ -1518,9 +1511,6 @@ void GCNPassConfig::addPreEmitPass() { // cases. addPass(&PostRAHazardRecognizerID); - if (isPassEnabled(EnableInsertSingleUseVDST, CodeGenOptLevel::Less)) - addPass(&AMDGPUInsertSingleUseVDSTID); - if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less)) addPass(&AMDGPUInsertDelayAluID); diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index e813653158e5d9..7c883cc2017ddd 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -81,7 +81,6 @@ add_llvm_target(AMDGPUCodeGen AMDGPUMCInstLower.cpp AMDGPUMemoryUtils.cpp AMDGPUIGroupLP.cpp - AMDGPUInsertSingleUseVDST.cpp AMDGPUMarkLastScratchLoad.cpp AMDGPUMIRFormatter.cpp AMDGPUOpenCLEnqueuedBlockLowering.cpp diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index a4ae8a1be32258..e6b7342d5fffcf 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -215,7 +215,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasPackedTID = false; bool ScalarizeGlobal = false; bool HasSALUFloatInsts = false; - bool HasVGPRSingleUseHintInsts = false; bool HasPseudoScalarTrans = false; bool HasRestrictedSOffset = false; @@ -1280,8 +1279,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasSALUFloatInsts() const { return HasSALUFloatInsts; } - bool hasVGPRSingleUseHintInsts() const { return HasVGPRSingleUseHintInsts; } - bool hasPseudoScalarTrans() const { return HasPseudoScalarTrans; } bool hasRestrictedSOffset() const { return HasRestrictedSOffset; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index c016be2fc6c0fb..087ca1f954464d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2409,8 +2409,6 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field bit EnableClamp = _EnableClamp; field bit IsTrue16 = 0; field bit IsRealTrue16 = 0; - field bit IsInvalidSingleUseConsumer = 0; - field bit IsInvalidSingleUseProducer = 0; field ValueType DstVT = ArgVT[0]; field ValueType Src0VT = ArgVT[1]; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 2e73a1a15f6b32..9da27a7c7ee7d6 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1752,11 +1752,6 @@ let OtherPredicates = [HasExportInsts] in "$simm16">; } // End SubtargetPredicate = isGFX11Plus -let SubtargetPredicate = HasVGPRSingleUseHintInsts in { - def S_SINGLEUSE_VDST : - SOPP_Pseudo<"s_singleuse_vdst", (ins s16imm:$simm16), "$simm16">; -} // End SubtargetPredicate = HasVGPRSingeUseHintInsts - let SubtargetPredicate = isGFX12Plus, hasSideEffects = 1 in { def S_WAIT_LOADCNT : SOPP_Pseudo<"s_wait_loadcnt", (ins s16imm:$simm16), "$simm16", @@ -2676,12 +2671,6 @@ defm S_ICACHE_INV : SOPP_Real_32_gfx11_gfx12<0x03c>; defm S_BARRIER : SOPP_Real_32_gfx11<0x03d>; -//===----------------------------------------------------------------------===// -// SOPP - GFX1150, GFX12. -//===----------------------------------------------------------------------===// - -defm S_SINGLEUSE_VDST : SOPP_Real_32_gfx11_gfx12<0x013>; - //===----------------------------------------------------------------------===// // SOPP - GFX6, GFX7, GFX8, GFX9, GFX10 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 8b5ec8793d84a2..f32c82f1e4ba4c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -379,12 +379,6 @@ struct VOPTrue16Info { bool IsTrue16; }; -struct SingleUseExceptionInfo { - uint16_t Opcode; - bool IsInvalidSingleUseConsumer; - bool IsInvalidSingleUseProducer; -}; - struct FP8DstByteSelInfo { uint16_t Opcode; bool HasFP8DstByteSel; @@ -396,8 +390,6 @@ struct FP8DstByteSelInfo { #define GET_MTBUFInfoTable_IMPL #define GET_MUBUFInfoTable_DECL #define GET_MUBUFInfoTable_IMPL -#define GET_SingleUseExceptionTable_DECL -#define GET_SingleUseExceptionTable_IMPL #define GET_SMInfoTable_DECL #define GET_SMInfoTable_IMPL #define GET_VOP1InfoTable_DECL @@ -626,16 +618,6 @@ bool isTrue16Inst(unsigned Opc) { return Info ? Info->IsTrue16 : false; } -bool isInvalidSingleUseConsumerInst(unsigned Opc) { - const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc); - return Info && Info->IsInvalidSingleUseConsumer; -} - -bool isInvalidSingleUseProducerInst(unsigned Opc) { - const SingleUseExceptionInfo *Info = getSingleUseExceptionHelper(Opc); - return Info && Info->IsInvalidSingleUseProducer; -} - bool isFP8DstSelInst(unsigned Opc) { const FP8DstByteSelInfo *Info = getFP8DstByteSelHelper(Opc); return Info ? Info->HasFP8DstByteSel : false; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 35c080d8e0bebc..da37534f2fa4ff 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -870,6 +870,8 @@ bool isInvalidSingleUseConsumerInst(unsigned Opc); LLVM_READONLY bool isInvalidSingleUseProducerInst(unsigned Opc); +bool isDPMACCInstruction(unsigned Opc); + LLVM_READONLY unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc); diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 33f2f9f1f5c5b9..bd805059705783 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -252,7 +252,6 @@ def VOP_READFIRSTLANE : VOPProfile <[i32, i32, untyped, untyped]> { def V_READFIRSTLANE_B32 : VOP1_Pseudo <"v_readfirstlane_b32", VOP_READFIRSTLANE, [], 1> { let isConvergent = 1; - let IsInvalidSingleUseConsumer = 1; } foreach vt = Reg32Types.types in { @@ -375,7 +374,6 @@ defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT>; def VOP_MOVRELS : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC32 = VRegSrc_32; let Src0RC64 = VRegSrc_32; - let IsInvalidSingleUseConsumer = 1; } // Special case because there are no true output operands. Hack vdst @@ -419,12 +417,8 @@ class VOP_MOVREL : VOPProfile<[untyped, i32, untyped, un let EmitDst = 1; // force vdst emission } -let IsInvalidSingleUseProducer = 1 in { - def VOP_MOVRELD : VOP_MOVREL; - def VOP_MOVRELSD : VOP_MOVREL { - let IsInvalidSingleUseConsumer = 1; - } -} +def VOP_MOVRELD : VOP_MOVREL; +def VOP_MOVRELSD : VOP_MOVREL; let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in { // v_movreld_b32 is a special case because the destination output @@ -541,7 +535,6 @@ let SubtargetPredicate = isGFX9Plus in { let Constraints = "$vdst = $src1, $vdst1 = $src0"; let DisableEncoding = "$vdst1,$src1"; let SchedRW = [Write64Bit, Write64Bit]; - let IsInvalidSingleUseConsumer = 1; } let isReMaterializable = 1 in @@ -708,8 +701,6 @@ let SubtargetPredicate = isGFX10Plus in { let Constraints = "$vdst = $src1, $vdst1 = $src0"; let DisableEncoding = "$vdst1,$src1"; let SchedRW = [Write64Bit, Write64Bit]; - let IsInvalidSingleUseConsumer = 1; - let IsInvalidSingleUseProducer = 1; } } // End Uses = [M0] } // End SubtargetPredicate = isGFX10Plus @@ -743,10 +734,7 @@ let SubtargetPredicate = isGFX11Plus in { } // Restrict src0 to be VGPR def V_PERMLANE64_B32 : VOP1_Pseudo<"v_permlane64_b32", VOP_MOVRELS, - [], /*VOP1Only=*/ 1> { - let IsInvalidSingleUseConsumer = 1; - let IsInvalidSingleUseProducer = 1; - } + [], /*VOP1Only=*/ 1>; defm V_MOV_B16 : VOP1Inst_t16<"v_mov_b16", VOP_I16_I16>; defm V_NOT_B16 : VOP1Inst_t16<"v_not_b16", VOP_I16_I16>; defm V_CVT_I32_I16 : VOP1Inst_t16<"v_cvt_i32_i16", VOP_I32_I16>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index dd48607402eb0b..52f7be3b4577df 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -788,12 +788,10 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag, } // End isCommutable = 1 // These are special and do not read the exec mask. -let isConvergent = 1, Uses = [], IsInvalidSingleUseConsumer = 1 in { +let isConvergent = 1, Uses = [] in { def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE, []>; let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { -def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, []> { - let IsInvalidSingleUseProducer = 1; - } +def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, []>; } // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in } // End isConvergent = 1 diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 466114b95f9f90..20beb41b7b58bb 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -157,12 +157,12 @@ defm V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile, fmaxnum_l } // End SubtargetPredicate = isNotGFX12Plus } // End SchedRW = [WriteDoubleAdd] -let SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1 in { +let SchedRW = [WriteIntMul] in { defm V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", V_MUL_PROF, DivergentBinFrag>; defm V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", V_MUL_PROF, mulhu>; defm V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", V_MUL_PROF>; defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF, mulhs>; -} // End SchedRW = [WriteIntMul], IsInvalidSingleUseConsumer = 1 +} // End SchedRW = [WriteIntMul] let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in { defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile, DivergentBinFrag>; @@ -260,9 +260,9 @@ let mayRaiseFPException = 0 in { // Seems suspicious but manual doesn't say it d let isReMaterializable = 1 in defm V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile>; -let Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1 in { +let Constraints = "@earlyclobber $vdst" in { defm V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile>; -} // End Constraints = "@earlyclobber $vdst", IsInvalidSingleUseConsumer = 1 +} // End Constraints = "@earlyclobber $vdst" let isReMaterializable = 1 in { @@ -277,16 +277,14 @@ let SchedRW = [Write64Bit] in { defm V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile, csra_64>; } // End SubtargetPredicate = isGFX6GFX7 - let IsInvalidSingleUseConsumer = 1 in { let SubtargetPredicate = isGFX8Plus in { defm V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile, clshr_rev_64>; defm V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile, cashr_rev_64>; - } // End SubtargetPredicate = isGFX8Plus, , IsInvalidSingleUseConsumer = 1 + } // End SubtargetPredicate = isGFX8Plus let SubtargetPredicate = isGFX8GFX9GFX10GFX11 in { defm V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile, clshl_rev_64>; } // End SubtargetPredicate = isGFX8GFX9GFX10GFX11 - } // End IsInvalidSingleUseConsumer = 1 } // End SchedRW = [Write64Bit] } // End isReMaterializable = 1 @@ -311,14 +309,14 @@ def VOPProfileMQSAD : VOP3_Profile { let HasModifiers = 0; } -let SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1 in { +let SubtargetPredicate = isGFX7Plus in { let Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] in { defm V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile>; defm V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOPProfileMQSAD>; } // End Constraints = "@earlyclobber $vdst", SchedRW = [WriteQuarterRate32] -} // End SubtargetPredicate = isGFX7Plus, IsInvalidSingleUseConsumer = 1 +} // End SubtargetPredicate = isGFX7Plus -let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1 in { +let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in { let SubtargetPredicate = isGFX7Plus, OtherPredicates = [HasNotMADIntraFwdBug] in { defm V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; defm V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; @@ -328,7 +326,7 @@ let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseCons defm V_MAD_U64_U32_gfx11 : VOP3Inst <"v_mad_u64_u32", VOP3b_I64_I1_I32_I32_I64>; defm V_MAD_I64_I32_gfx11 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; } -} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU], IsInvalidSingleUseConsumer = 1 +} // End isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] let FPDPRounding = 1 in { @@ -865,10 +863,10 @@ let SubtargetPredicate = isGFX10Plus in { } // End isCommutable = 1, isReMaterializable = 1 def : ThreeOp_i32_Pats; - let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in", IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1 in { + let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { defm V_PERMLANE16_B32 : VOP3Inst<"v_permlane16_b32", VOP3_PERMLANE_Profile>; defm V_PERMLANEX16_B32 : VOP3Inst<"v_permlanex16_b32", VOP3_PERMLANE_Profile>; - } // End $vdst = $vdst_in, DisableEncoding $vdst_in, IsInvalidSingleUseConsumer = 1, IsInvalidSingleUseProducer = 1 + } // End $vdst = $vdst_in, DisableEncoding $vdst_in foreach vt = Reg32Types.types in { def : PermlanePat; @@ -1286,12 +1284,11 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in { } } // End AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" -let IsInvalidSingleUseConsumer = 1 in { - defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>; - let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1 in { - defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>; - } // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32: $src1, VGPR_32:$vdst_in), IsInvalidSingleUseProducer = 1 -} // End IsInvalidSingleUseConsumer = 1 +defm V_READLANE_B32 : VOP3_Real_No_Suffix_gfx10<0x360>; + +let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) in { + defm V_WRITELANE_B32 : VOP3_Real_No_Suffix_gfx10<0x361>; +} // End InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in) let SubtargetPredicate = isGFX10Before1030 in { defm V_MUL_LO_I32 : VOP3_Real_gfx10<0x16b>; diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td index f4d2c29158f49f..5eee71887964ad 100644 --- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td @@ -382,19 +382,15 @@ defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16", AMDGPUfdot2, 1/*ExplicitClamp*/>; let OtherPredicates = [HasDot7Insts] in { -let IsInvalidSingleUseConsumer = 1 in { - defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", - VOP3P_Profile, int_amdgcn_udot4, 1>; -} +defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8", + VOP3P_Profile, int_amdgcn_udot4, 1>; defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4", VOP3P_Profile, int_amdgcn_udot8, 1>; } // End OtherPredicates = [HasDot7Insts] let OtherPredicates = [HasDot1Insts] in { -let IsInvalidSingleUseConsumer = 1 in { - defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", - VOP3P_Profile, int_amdgcn_sdot4, 1>; -} +defm V_DOT4_I32_I8 : VOP3PInst<"v_dot4_i32_i8", + VOP3P_Profile, int_amdgcn_sdot4, 1>; defm V_DOT8_I32_I4 : VOP3PInst<"v_dot8_i32_i4", VOP3P_Profile, int_amdgcn_sdot8, 1>; } // End OtherPredicates = [HasDot1Insts] diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index be862b44917e15..d6e08dce130ced 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -464,10 +464,9 @@ multiclass VOPC_I16 : VOPC_Pseudos ; -let IsInvalidSingleUseConsumer = 1 in { - multiclass VOPC_I64 : - VOPC_Pseudos ; -} +multiclass VOPC_I64 : + VOPC_Pseudos ; + multiclass VOPCX_F16 { let OtherPredicates = [Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in { @@ -502,10 +501,8 @@ multiclass VOPCX_I16 { multiclass VOPCX_I32 : VOPCX_Pseudos ; -let IsInvalidSingleUseConsumer = 1 in { - multiclass VOPCX_I64 : - VOPCX_Pseudos ; -} +multiclass VOPCX_I64 : + VOPCX_Pseudos ; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 5a460ef0d42320..05a7d907d237ae 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -17,8 +17,6 @@ class LetDummies { bit isReMaterializable; bit isAsCheapAsAMove; bit FPDPRounding; - bit IsInvalidSingleUseConsumer; - bit IsInvalidSingleUseProducer; Predicate SubtargetPredicate; string Constraints; string DisableEncoding; @@ -67,8 +65,6 @@ class VOP_Pseudo (NAME); bit IsTrue16 = P.IsTrue16; - bit IsInvalidSingleUseConsumer = P.IsInvalidSingleUseConsumer; - bit IsInvalidSingleUseProducer = P.IsInvalidSingleUseProducer; VOPProfile Pfl = P; string AsmOperands; @@ -165,8 +161,6 @@ class VOP3P_Pseudo pattern = []> : class VOP_Real { Instruction Opcode = !cast(NAME); bit IsSingle = ps.Pfl.IsSingle; - bit IsInvalidSingleUseConsumer = ps.Pfl.IsInvalidSingleUseConsumer; - bit IsInvalidSingleUseProducer = ps.Pfl.IsInvalidSingleUseProducer; } class VOP3_Real : @@ -844,9 +838,6 @@ class VOP_DPP_Pseudo pattern=[], let Constraints = !if(P.NumSrcArgs, P.TieRegDPP # " = $vdst", ""); let DisableEncoding = !if(P.NumSrcArgs, P.TieRegDPP, ""); let DecoderNamespace = "GFX8"; - - let IsInvalidSingleUseConsumer = !not(VINTERP); - let IsInvalidSingleUseProducer = !not(VINTERP); } class VOP3_DPP_Pseudo : @@ -1714,13 +1705,4 @@ def VOPTrue16Table : GenericTable { let PrimaryKey = ["Opcode"]; let PrimaryKeyName = "getTrue16OpcodeHelper"; -} - -def SingleUseExceptionTable : GenericTable { - let FilterClass = "VOP_Pseudo"; - let CppTypeName = "SingleUseExceptionInfo"; - let Fields = ["Opcode", "IsInvalidSingleUseConsumer", "IsInvalidSingleUseProducer"]; - - let PrimaryKey = ["Opcode"]; - let PrimaryKeyName = "getSingleUseExceptionHelper"; -} +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir deleted file mode 100644 index 9e65ce329df431..00000000000000 --- a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir +++ /dev/null @@ -1,1420 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -verify-machineinstrs -run-pass=amdgpu-insert-single-use-vdst %s -o - | FileCheck %s - -# One single-use producer. ---- -name: one_producer -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: one_producer - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0, $vgpr2 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec - $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - bb.1: - liveins: $vgpr0, $vgpr2 -... - -# One single-use producer of a 64-bit value. ---- -name: one_producer_64bit -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: one_producer_64bit - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0_vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec - ; CHECK-NEXT: $vgpr4_vgpr5 = V_MOV_B64_e64 $vgpr2_vgpr3, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr4_vgpr5 - bb.0: - liveins: $vgpr0_vgpr1 - $vgpr2_vgpr3 = V_LSHLREV_B64_e64 0, $vgpr0_vgpr1, implicit $exec - $vgpr4_vgpr5 = V_MOV_B64_e64 $vgpr2_vgpr3, implicit $exec - bb.1: - liveins: $vgpr4_vgpr5 -... - -# Two consecutive single-use producers. ---- -name: two_producers -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: two_producers - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 2 - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0, $vgpr3 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec - $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - $vgpr3 = V_ADD_U32_e32 $vgpr0, $vgpr2, implicit $exec - bb.1: - liveins: $vgpr0, $vgpr3 -... - -# Redefinitions of v0. ---- -name: redefinitions -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: redefinitions - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 4 - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - bb.0: - liveins: $vgpr0 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: -... - -# One producer with no consumers. ---- -name: no_consumer -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: no_consumer - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - bb.0: - liveins: $vgpr0 - $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec - bb.1: -... - -# One consumer with two uses of the same value. ---- -name: one_consumer_two_uses -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: one_consumer_two_uses - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0, $vgpr2 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec - bb.1: - liveins: $vgpr0, $vgpr2 -... - -# A longer example. ---- -name: longer_example -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: longer_example - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 274 - ; CHECK-NEXT: $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode - ; CHECK-NEXT: $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode - ; CHECK-NEXT: $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode - ; CHECK-NEXT: $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode - ; CHECK-NEXT: $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode - ; CHECK-NEXT: $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode - ; CHECK-NEXT: $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode - ; CHECK-NEXT: $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode - ; CHECK-NEXT: $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode - ; CHECK-NEXT: $vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr16, $vgpr18 - bb.0: - liveins: $vgpr3, $vgpr5, $sgpr0, $sgpr2, $sgpr4, $sgpr5, $sgpr16, $sgpr17, $sgpr18, $sgpr19 - $vgpr14 = V_MUL_F32_e32 $sgpr4, $vgpr3, implicit $exec, implicit $mode - $sgpr3 = S_MUL_F16 $sgpr0, $sgpr2, implicit $mode - $vgpr15 = V_MUL_F32_e32 $sgpr5, $vgpr3, implicit $exec, implicit $mode - $vgpr17 = V_FMA_F32_e64 0, $sgpr16, 0, $vgpr5, 0, $vgpr14, 0, 0, implicit $exec, implicit $mode - $sgpr1 = S_ADD_F16 $sgpr0, 15360, implicit $mode - $vgpr15 = V_FMA_F32_e64 0, $sgpr17, 0, $vgpr5, 0, $vgpr15, 0, 0, implicit $exec, implicit $mode - $vgpr14 = V_FMA_F32_e64 0, $sgpr18, 0, $vgpr15, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode - $vgpr15 = V_FMA_F32_e64 0, $sgpr19, 0, $vgpr14, 0, $vgpr17, 0, 0, implicit $exec, implicit $mode - $vgpr16 = V_LOG_F32_e32 $vgpr15, implicit $exec, implicit $mode - $vgpr18 = V_EXP_F32_e32 $vgpr15, implicit $exec, implicit $mode - bb.1: - liveins: $vgpr16, $vgpr18 -... - -# Multiple uses of v0. ---- -name: multiple_uses_1 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: multiple_uses_1 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1, $vgpr2 - bb.0: - liveins: $vgpr0 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr1, $vgpr2 -... - -# Multiple uses of v0 and redefinitions of v1 and v2. ---- -name: multiple_uses_2 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: multiple_uses_2 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: S_SINGLEUSE_VDST 2 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1, $vgpr2 - bb.0: - liveins: $vgpr0 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr1, $vgpr2 -... - -# Multiple uses of all but v1. ---- -name: multiple_uses_3 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: multiple_uses_3 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr2, $vgpr3 - bb.0: - liveins: $vgpr0 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr1, implicit $exec - bb.1: - liveins: $vgpr2, $vgpr3 -... - -# Second use is an instruction that reads and writes v1. ---- -name: multiple_uses_4 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: multiple_uses_4 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec - $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - bb.1: - liveins: $vgpr0, $vgpr1, $vgpr2 -... - -# Results are live-in to another basic block. ---- -name: basic_block_1 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: basic_block_1 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $vgpr1, $vgpr2 - bb.0: - liveins: $vgpr0 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr0, $vgpr1, $vgpr2 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.2: - liveins: $vgpr1, $vgpr2 -... - -# Result v2 has multiple uses in another basic block. ---- -name: basic_block_2 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: basic_block_2 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $vgpr3 - bb.0: - liveins: $vgpr0, $vgpr1 - $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec - bb.1: - liveins: $vgpr2 - $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec - bb.2: - liveins: $vgpr3 -... - -# Results are redefined in another basic block. ---- -name: basic_block_3 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: basic_block_3 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 - bb.0: - liveins: $vgpr0 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr0, $vgpr1 - $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec - bb.2: - liveins: $vgpr0, $vgpr1, $vgpr2 -... - -# Exec modified between producer and consumer. ---- -name: exec_mask -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: exec_mask - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr0_sgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: $exec = COPY $sgpr0_sgpr1 - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0 - bb.0: - liveins: $sgpr0_sgpr1 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - $exec = COPY $sgpr0_sgpr1 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr0 -... - -# Exec_lo modified between producer and consumer. ---- -name: exec_mask_lo -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: exec_mask_lo - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: $exec_lo = COPY $sgpr0 - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0 - bb.0: - liveins: $sgpr0 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - $exec_lo = COPY $sgpr0 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr0 -... - -# Exec_hi modified between producer and consumer. ---- -name: exec_mask_hi -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: exec_mask_hi - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $sgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: $exec_hi = COPY $sgpr0 - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0 - bb.0: - liveins: $sgpr0 - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - $exec_hi = COPY $sgpr0 - $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr0 -... - -# Write 32-bit vgpr and then read from low 16 bits. ---- -name: write_full_read_lo -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_full_read_lo - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1_lo16 - bb.0: - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec - bb.1: - liveins: $vgpr1_lo16 -... - -# Write 32-bit vgpr and then read from high 16 bits. ---- -name: write_full_read_hi -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_full_read_hi - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1_hi16 - bb.0: - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec - bb.1: - liveins: $vgpr1_hi16 -... - -# Write 32-bit vgpr and then read from both halves. ---- -name: write_full_read_both -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_full_read_both - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec - ; CHECK-NEXT: $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1 - bb.0: - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - $vgpr1_lo16 = V_MOV_B16_t16_e32 $vgpr0_lo16, implicit $exec - $vgpr1_hi16 = V_MOV_B16_t16_e32 $vgpr0_hi16, implicit $exec - bb.1: - liveins: $vgpr1 -... - -# Write 32-bit vgpr and then read from both halves in the same instruction. ---- -name: write_full_read_both_same_instruction -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_full_read_both_same_instruction - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1_lo16 = V_ADD_F16_t16_e32 $vgpr0_lo16, $vgpr0_hi16, implicit $mode, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1_lo16 - bb.0: - $vgpr0 = V_MOV_B32_e32 0, implicit $exec - $vgpr1_lo16 = V_ADD_F16_t16_e32 $vgpr0_lo16, $vgpr0_hi16, implicit $mode, implicit $exec - bb.1: - liveins: $vgpr1_lo16 -... - -# Write low 16-bits and then read 32-bit vgpr. ---- -name: write_lo_read_full -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_lo_read_full - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1 - bb.0: - liveins: $vgpr0 - $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr1 -... - -# Write low 16-bits and then read 32-bit vgpr twice. ---- -name: write_lo_read_full_twice -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_lo_read_full_twice - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1, $vgpr2 - bb.0: - liveins: $vgpr0 - $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr1, $vgpr2 -... - -# Write high 16-bits and then read 32-bit vgpr. ---- -name: write_hi_read_full -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_hi_read_full - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1 - bb.0: - liveins: $vgpr0 - $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr1 -... - -# Write high 16-bits and then read 32-bit vgpr twice. ---- -name: write_hi_read_full_twice -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_hi_read_full_twice - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1, $vgpr2 - bb.0: - liveins: $vgpr0 - $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr1, $vgpr2 -... - -# Write low 16-bits and then write high 16-bits and then read 32-bit vgpr. ---- -name: write_both_read_full -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_both_read_full - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 2 - ; CHECK-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1 - bb.0: - $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec - $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr1 -... - -# Write low 16-bits and then write high 16-bits and then read 32-bit vgpr twice. ---- -name: write_both_read_full_twice -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: write_both_read_full_twice - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1, $vgpr2 - bb.0: - $vgpr0_lo16 = V_MOV_B16_t16_e32 0, implicit $exec - $vgpr0_hi16 = V_MOV_B16_t16_e32 0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr1, $vgpr2 -... - -# Three single use producer instructions with non single use producer -# instructions in between. ---- -name: three_producers_with_two_skips -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: three_producers_with_two_skips - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 9361 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr2, $vgpr4 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr2, $vgpr4 -... - -# Six single use producer instructions with non single use producer -# instructions in between. ---- -name: six_producers_with_four_skips -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: six_producers_with_four_skips - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 145 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: S_SINGLEUSE_VDST 9362 - ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr2, $vgpr4, $vgpr7, $vgpr9 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr2, $vgpr4, $vgpr7, $vgpr9 -... - -# Five single use producer instructions, followed by -# four non single use producers, followed by -# three single use producer instructions, followed by -# two non single use producers, followed by -# one single use producer instructions. ---- -name: immediate_order -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: immediate_order - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 10693 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr13, $vgpr14 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr13, $vgpr14 -... - -# Maximum number of single use producers that can be encoded in a single -# instruction. ---- -name: maximum_producers_single_instruction -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: maximum_producers_single_instruction - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 58255 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: -... - -# One more than the maximum number of single use producers that can be encoded -# in a single instruction. ---- -name: too_many_producers_single_instruction -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: too_many_producers_single_instruction - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: S_SINGLEUSE_VDST 58255 - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - - - - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: -... - -# Maximum distance between single use producers that can be encoded in a single -# instruction. ---- -name: maximum_skips_single_instruction -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: maximum_skips_single_instruction - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 15473 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15 -... - -# One more than the maximum distance between single use producers that can be -# encoded in a single instruction. ---- -name: too_many_skips_single_instruction -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: too_many_skips_single_instruction - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16 -... - - -# Maximum possible encoding value with all bits of the immediate set ---- -name: all_immediate_bits_set -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: all_immediate_bits_set - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 65535 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr31 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr32 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr33 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr34 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr35 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr36 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr37 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr38 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr39 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr41 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr42 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr43 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr30, $vgpr31, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr5 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr6 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr7 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr8 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr9 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr10 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr11 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr12 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr13 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr14 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr15 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr17 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr18 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr19 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr20 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr21 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr22 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr23 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr24 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr25 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr26 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr27 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr28 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr29 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr30 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr31 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr32 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr33 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr34 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr35 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr36 = V_MOV_B32_e32 $vgpr0, implicit $exec - - $vgpr37 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr38 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr39 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr40 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr41 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr42 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr43 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr30, $vgpr31, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36 - -... - -# Tests for multi-cycle instructions that are explicitly excluded. - -# Valid producers but invalid consumer opcodes. ---- -name: v_mul_hi_u32_e64 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: v_mul_hi_u32_e64 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr2 = V_MUL_HI_U32_e64 $vgpr0, $vgpr1, implicit $exec - ; CHECK-NEXT: $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0, $vgpr3 - bb.0: - liveins: $vgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr2 = V_MUL_HI_U32_e64 $vgpr0, $vgpr1, implicit $exec - $vgpr3 = V_MOV_B32_e32 $vgpr2, implicit $exec - bb.1: - liveins: $vgpr0, $vgpr3 -... - ---- -name: v_cmpx_t_u64_e64 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: v_cmpx_t_u64_e64 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $sgpr0 = V_CMPX_EQ_U64_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit-def $exec, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0 - bb.0: - liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $sgpr0 = V_CMPX_EQ_U64_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, implicit-def $exec, implicit $exec - S_BRANCH %bb.1 - bb.1: - liveins: $vgpr0 -... - ---- -name: v_lshlrev_b64_e64 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: v_lshlrev_b64_e64 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0_vgpr1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr2_vgpr3 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec - ; CHECK-NEXT: $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr6_vgpr7 = V_LSHLREV_B64_e64 0, $vgpr4_vgpr5, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr4_vgpr5 - bb.0: - liveins: $vgpr0_vgpr1 - $vgpr2_vgpr3 = V_MOV_B64_e64 $vgpr0_vgpr1, implicit $exec - $vgpr4_vgpr5 = V_LSHLREV_B64_e64 0, $vgpr2_vgpr3, implicit $exec - $vgpr6_vgpr7 = V_LSHLREV_B64_e64 0, $vgpr4_vgpr5, implicit $exec - bb.1: - liveins: $vgpr4_vgpr5 -... - -# Invalid producers but valid consumer opcodes. ---- -name: v_movereld_b32_e32 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: v_movereld_b32_e32 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $vgpr2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $m0 = S_MOV_B32 0 - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: V_MOVRELD_B32_e32 $vgpr2, $vgpr1, implicit $m0, implicit $exec, implicit-def $vgpr1_vgpr2, implicit undef $vgpr1_vgpr2(tied-def 4) - ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr1, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr3 - bb.0: - liveins: $vgpr0, $vgpr2 - $m0 = S_MOV_B32 0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - V_MOVRELD_B32_e32 $vgpr2, $vgpr1, implicit $m0, implicit $exec, implicit-def $vgpr1_vgpr2, implicit undef $vgpr1_vgpr2(tied-def 4) - $vgpr3 = V_ADD_U32_e32 $vgpr2, $vgpr1, implicit $exec - bb.1: - liveins: $vgpr3 -... - -# Invalid producers and invalid consumer opcodes. ---- -name: v_writelane_b32 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: v_writelane_b32 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $sgpr0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_WRITELANE_B32 $sgpr0, 0, $vgpr1 - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0 - bb.0: - liveins: $vgpr0, $sgpr0 - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - $vgpr1 = V_WRITELANE_B32 $sgpr0, 0, $vgpr1 - $vgpr2 = V_MOV_B32_e32 $vgpr1, implicit $exec - bb.1: - liveins: $vgpr0 -... - -# DPP instructions cannot be single use producers or consumers ---- -name: V_ADD_NC_U32_dpp -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: V_ADD_NC_U32_dpp - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr0, $vcc - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec - ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec - ; CHECK-NEXT: $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr0 - bb.0: - liveins: $vgpr0, $vcc - $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec - $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec - $vgpr0 = V_ADDC_U32_dpp $vgpr0, $vgpr0, $vgpr0, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec - bb.1: - liveins: $vgpr0 -... - -# Exception to the rule that dpp instructions -# cannot be single use producers or consumers ---- -name: V_INTERP_MOV_F32 -tracksRegLiveness: true -body: | - ; CHECK-LABEL: name: V_INTERP_MOV_F32 - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: S_SINGLEUSE_VDST 1 - ; CHECK-NEXT: $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec - ; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: liveins: $vgpr1 - bb.0: - $vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit $mode, implicit $m0, implicit $exec - $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec - bb.1: - liveins: $vgpr1 -... - diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1150_asm_sopp.s deleted file mode 100644 index 044ce48c267846..00000000000000 --- a/llvm/test/MC/AMDGPU/gfx1150_asm_sopp.s +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1150 -show-encoding %s | FileCheck --check-prefixes=GFX1150 %s - -s_singleuse_vdst 0x0000 -// GFX1150: encoding: [0x00,0x00,0x93,0xbf] - -s_singleuse_vdst 0xffff -// GFX1150: encoding: [0xff,0xff,0x93,0xbf] - -s_singleuse_vdst 0x1234 -// GFX1150: encoding: [0x34,0x12,0x93,0xbf] diff --git a/llvm/test/MC/AMDGPU/gfx11_unsupported.s b/llvm/test/MC/AMDGPU/gfx11_unsupported.s index 1e8d7684e942a6..376077c188b46a 100644 --- a/llvm/test/MC/AMDGPU/gfx11_unsupported.s +++ b/llvm/test/MC/AMDGPU/gfx11_unsupported.s @@ -2011,9 +2011,6 @@ s_cmp_neq_f16 s1, s2 s_cmp_nlt_f16 s1, s2 // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU -s_singleuse_vdst 0x1234 -// CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU - buffer_atomic_sub_clamp_u32 v5, off, s[8:11], s3 offset:0 glc // CHECK: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s index e98659208d5a9c..fdcabc4352c69b 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s @@ -69,15 +69,6 @@ s_wait_alu depctr_va_sdst(3) s_wait_alu depctr_va_vdst(14) depctr_va_sdst(6) depctr_vm_vsrc(6) // GFX12: encoding: [0x9b,0xed,0x88,0xbf] -s_singleuse_vdst 0x0000 -// GFX12: encoding: [0x00,0x00,0x93,0xbf] - -s_singleuse_vdst 0xffff -// GFX12: encoding: [0xff,0xff,0x93,0xbf] - -s_singleuse_vdst 0x1234 -// GFX12: encoding: [0x34,0x12,0x93,0xbf] - s_barrier_wait 0xffff // GFX12: encoding: [0xff,0xff,0x94,0xbf] diff --git a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt index d6e8b7ee2f01f0..f819a61949b577 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/decode-err.txt @@ -1,16 +1,11 @@ # RUN: llvm-mc -triple=amdgcn -mcpu=gfx900 -disassemble -show-encoding -filetype=null < %s 2>&1 | FileCheck -check-prefix=GCN-ERR %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefixes=W32 %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -disassemble -show-encoding < %s 2>&1 | FileCheck -check-prefixes=W64 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -disassemble -show-encoding -filetype=null < %s 2>&1 | FileCheck -check-prefix=GFX11-ERR %s # RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -disassemble -show-encoding -filetype=null < %s 2>&1 | FileCheck -check-prefix=GFX12-ERR %s # GCN-ERR: [[@LINE+1]]:1: warning: invalid instruction encoding 0xdf,0x00,0x00,0x02 -# this is s_singleuse_vdst 0x1234, which is only valid on gfx1150 -# GFX11-ERR: [[@LINE+1]]:1: warning: invalid instruction encoding -0x34,0x12,0x93,0xbf - # this is s_waitcnt_vscnt exec_hi, 0x1234, which is valid on gfx11, but not on gfx12 # GFX12-ERR: [[@LINE+1]]:1: warning: invalid instruction encoding 0x34,0x12,0x7f,0xbc diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_sopp.txt deleted file mode 100644 index 8fa266a73ff87f..00000000000000 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_sopp.txt +++ /dev/null @@ -1,10 +0,0 @@ -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1150 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX1150 %s - -# GFX1150: s_singleuse_vdst 0x0 ; encoding: [0x00,0x00,0x93,0xbf] -0x00,0x00,0x93,0xbf - -# GFX1150: s_singleuse_vdst 0xffff ; encoding: [0xff,0xff,0x93,0xbf] -0xff,0xff,0x93,0xbf - -# GFX1150: s_singleuse_vdst 0x1234 ; encoding: [0x34,0x12,0x93,0xbf] -0x34,0x12,0x93,0xbf diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt index d42f920aa61dd7..d69801512c0786 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt @@ -60,14 +60,6 @@ # GFX12: s_wait_storecnt_dscnt 0xc1d1 ; encoding: [0xd1,0xc1,0xc9,0xbf] 0xd1,0xc1,0xc9,0xbf -# GFX12: s_singleuse_vdst 0x0 ; encoding: [0x00,0x00,0x93,0xbf] -0x00,0x00,0x93,0xbf - -# GFX12: s_singleuse_vdst 0xffff ; encoding: [0xff,0xff,0x93,0xbf] -0xff,0xff,0x93,0xbf - -# GFX12: s_singleuse_vdst 0x1234 ; encoding: [0x34,0x12,0x93,0xbf] -0x34,0x12,0x93,0xbf # GFX12: s_barrier_wait 0xffff ; encoding: [0xff,0xff,0x94,0xbf] 0xff,0xff,0x94,0xbf diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn index dd4af4e98832f7..f83efbd3558025 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/BUILD.gn @@ -152,7 +152,6 @@ static_library("LLVMAMDGPUCodeGen") { "AMDGPUISelLowering.cpp", "AMDGPUImageIntrinsicOptimizer.cpp", "AMDGPUInsertDelayAlu.cpp", - "AMDGPUInsertSingleUseVDST.cpp", "AMDGPUInstCombineIntrinsic.cpp", "AMDGPUInstrInfo.cpp", "AMDGPUInstructionSelector.cpp",