diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index 29935bb8046ff..c6c4a3e2a4e1e 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -223,6 +223,16 @@ def verify_stop_exception_info(self, expected_description): return True return False + def verify_stop_on_entry(self) -> None: + """Waits for the process to be stopped and then verifies at least one + thread has the stop reason 'entry'.""" + self.dap_server.wait_for_stopped() + self.assertIn( + "entry", + (t["reason"] for t in self.dap_server.thread_stop_reasons.values()), + "Expected at least one thread to report stop reason 'entry' in {self.dap_server.thread_stop_reasons}", + ) + def verify_commands(self, flavor: str, output: str, commands: list[str]): self.assertTrue(output and len(output) > 0, "expect console output") lines = output.splitlines() diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp index d90108f687f84..36dee1470e0a2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp @@ -22,7 +22,6 @@ #include "lldb/Utility/Stream.h" #include "lldb/Utility/Timer.h" #include "lldb/lldb-private-enumerations.h" -#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ThreadPool.h" #include #include @@ -33,10 +32,10 @@ using namespace lldb_private::plugin::dwarf; using namespace llvm::dwarf; void ManualDWARFIndex::Index() { - if (m_indexed) - return; - m_indexed = true; + std::call_once(m_indexed_flag, [this]() { IndexImpl(); }); +} +void ManualDWARFIndex::IndexImpl() { ElapsedTime elapsed(m_index_time); LLDB_SCOPED_TIMERF("%p", static_cast(m_dwarf)); if (LoadFromCache()) { diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h index 0b5b2f3e84309..41e0e620a4896 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h @@ -66,8 +66,14 @@ class ManualDWARFIndex : public DWARFIndex { void Dump(Stream &s) override; private: + /// Reads the DWARF debug info to build the index once. + /// + /// Should be called before attempting to retrieve symbols. void Index(); + /// Call `ManualDWARFIndex::Index()` instead. + void IndexImpl(); + /// Decode a serialized version of this object from data. /// /// \param data @@ -170,7 +176,7 @@ class ManualDWARFIndex : public DWARFIndex { llvm::DenseSet m_type_sigs_to_avoid; IndexSet m_set; - bool m_indexed = false; + std::once_flag m_indexed_flag; }; } // namespace dwarf } // namespace lldb_private::plugin diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py index 83faf276852f8..e8e07e1e86fc4 100644 --- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py +++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart.py @@ -51,20 +51,8 @@ def test_stopOnEntry(self): self.build_and_launch(program, stopOnEntry=True) [bp_main] = self.set_function_breakpoints(["main"]) - self.dap_server.request_configurationDone() - self.dap_server.wait_for_stopped() - # Once the "configuration done" event is sent, we should get a stopped - # event immediately because of stopOnEntry. - self.assertTrue( - len(self.dap_server.thread_stop_reasons) > 0, - "expected stopped event during launch", - ) - for _, body in self.dap_server.thread_stop_reasons.items(): - if "reason" in body: - reason = body["reason"] - self.assertNotEqual( - reason, "breakpoint", 'verify stop isn\'t "main" breakpoint' - ) + self.continue_to_next_stop() + self.verify_stop_on_entry() # Then, if we continue, we should hit the breakpoint at main. self.continue_to_breakpoints([bp_main]) @@ -73,17 +61,7 @@ def test_stopOnEntry(self): # main. resp = self.dap_server.request_restart() self.assertTrue(resp["success"]) - stopped_events = self.dap_server.wait_for_stopped() - for stopped_event in stopped_events: - if "body" in stopped_event: - body = stopped_event["body"] - if "reason" in body: - reason = body["reason"] - self.assertNotEqual( - reason, - "breakpoint", - 'verify stop after restart isn\'t "main" breakpoint', - ) + self.verify_stop_on_entry() @skipIfWindows def test_arguments(self): diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py index e1ad1425a993d..7d4949907df0d 100644 --- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py +++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py @@ -11,27 +11,6 @@ @skipIfBuildType(["debug"]) class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase): - def verify_stopped_on_entry(self, stopped_events: List[Dict[str, Any]]): - seen_stopped_event = 0 - for stopped_event in stopped_events: - body = stopped_event.get("body") - if body is None: - continue - - reason = body.get("reason") - if reason is None: - continue - - self.assertNotEqual( - reason, - "breakpoint", - 'verify stop after restart isn\'t "main" breakpoint', - ) - if reason == "entry": - seen_stopped_event += 1 - - self.assertEqual(seen_stopped_event, 1, "expect only one stopped entry event.") - @skipIfAsan @skipIfWindows @skipIf(oslist=["linux"], archs=["arm$"]) # Always times out on buildbot @@ -92,11 +71,8 @@ def test_stopOnEntry(self): self.build_and_launch(program, console="integratedTerminal", stopOnEntry=True) [bp_main] = self.set_function_breakpoints(["main"]) - self.dap_server.request_continue() # sends configuration done - stopped_events = self.dap_server.wait_for_stopped() - # We should be stopped at the entry point. - self.assertGreaterEqual(len(stopped_events), 0, "expect stopped events") - self.verify_stopped_on_entry(stopped_events) + self.dap_server.request_configurationDone() + self.verify_stop_on_entry() # Then, if we continue, we should hit the breakpoint at main. self.dap_server.request_continue() @@ -105,8 +81,7 @@ def test_stopOnEntry(self): # Restart and check that we still get a stopped event before reaching # main. self.dap_server.request_restart() - stopped_events = self.dap_server.wait_for_stopped() - self.verify_stopped_on_entry(stopped_events) + self.verify_stop_on_entry() # continue to main self.dap_server.request_continue() diff --git a/lldb/tools/lldb-dap/CMakeLists.txt b/lldb/tools/lldb-dap/CMakeLists.txt index 7db334ca56bcf..dd1bbbdddfc59 100644 --- a/lldb/tools/lldb-dap/CMakeLists.txt +++ b/lldb/tools/lldb-dap/CMakeLists.txt @@ -1,9 +1,6 @@ # We need to include the llvm components we depend on manually, as liblldb does # not re-export those. set(LLVM_LINK_COMPONENTS Support) -set(LLVM_TARGET_DEFINITIONS Options.td) -tablegen(LLVM Options.inc -gen-opt-parser-defs) -add_public_tablegen_target(LLDBDAPOptionsTableGen) add_lldb_library(lldbDAP Breakpoint.cpp diff --git a/lldb/tools/lldb-dap/EventHelper.cpp b/lldb/tools/lldb-dap/EventHelper.cpp index c5d5f2bb59b42..12d9e21c52ab3 100644 --- a/lldb/tools/lldb-dap/EventHelper.cpp +++ b/lldb/tools/lldb-dap/EventHelper.cpp @@ -176,7 +176,7 @@ llvm::Error SendThreadStoppedEvent(DAP &dap, bool on_entry) { llvm::DenseSet old_thread_ids; old_thread_ids.swap(dap.thread_ids); - uint32_t stop_id = process.GetStopID(); + uint32_t stop_id = on_entry ? 0 : process.GetStopID(); const uint32_t num_threads = process.GetNumThreads(); // First make a pass through the threads to see if the focused thread diff --git a/lldb/tools/lldb-dap/JSONUtils.cpp b/lldb/tools/lldb-dap/JSONUtils.cpp index 2780a5b7748e8..1a3a6701b194d 100644 --- a/lldb/tools/lldb-dap/JSONUtils.cpp +++ b/lldb/tools/lldb-dap/JSONUtils.cpp @@ -711,7 +711,7 @@ llvm::json::Value CreateThreadStopped(DAP &dap, lldb::SBThread &thread, break; } if (stop_id == 0) - body.try_emplace("reason", "entry"); + body["reason"] = "entry"; const lldb::tid_t tid = thread.GetThreadID(); body.try_emplace("threadId", (int64_t)tid); // If no description has been set, then set it to the default thread stopped diff --git a/lldb/tools/lldb-dap/tool/CMakeLists.txt b/lldb/tools/lldb-dap/tool/CMakeLists.txt index b39a4ed9c40e7..5335d25c5d450 100644 --- a/lldb/tools/lldb-dap/tool/CMakeLists.txt +++ b/lldb/tools/lldb-dap/tool/CMakeLists.txt @@ -1,3 +1,7 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(LLDBDAPOptionsTableGen) + add_lldb_tool(lldb-dap lldb-dap.cpp diff --git a/lldb/tools/lldb-dap/Options.td b/lldb/tools/lldb-dap/tool/Options.td similarity index 100% rename from lldb/tools/lldb-dap/Options.td rename to lldb/tools/lldb-dap/tool/Options.td diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 039d61624093d..0dba9412564d4 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -225,7 +225,7 @@ what to add to your calendar invite. - * - GlobalISel - Every 2nd Tuesday of the month - - `gcal `__ + - `gcal `__ - `Meeting details/agenda `__ diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 581b4ad161daa..c8196d8a7ef48 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -90,7 +90,6 @@ LLVM_ABI void initializeDSELegacyPassPass(PassRegistry &); LLVM_ABI void initializeDXILMetadataAnalysisWrapperPassPass(PassRegistry &); LLVM_ABI void initializeDXILMetadataAnalysisWrapperPrinterPass(PassRegistry &); LLVM_ABI void initializeDXILResourceBindingWrapperPassPass(PassRegistry &); -LLVM_ABI void initializeDXILResourceImplicitBindingLegacyPass(PassRegistry &); LLVM_ABI void initializeDXILResourceTypeWrapperPassPass(PassRegistry &); LLVM_ABI void initializeDXILResourceWrapperPassPass(PassRegistry &); LLVM_ABI void initializeDeadMachineInstructionElimPass(PassRegistry &); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b1776eaae6e86..44e5a187c4281 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2871,18 +2871,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) { assert(OpNo > 1); // Because the first two arguments are guaranteed legal. SmallVector NewOps(N->ops()); - SDValue Operand = N->getOperand(OpNo); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType()); - NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand); + NewOps[OpNo] = GetPromotedInteger(NewOps[OpNo]); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) { assert(OpNo >= 7); SmallVector NewOps(N->ops()); - SDValue Operand = N->getOperand(OpNo); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType()); - NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand); + NewOps[OpNo] = GetPromotedInteger(NewOps[OpNo]); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td index d95013123aced..65dce74a1e894 100644 --- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td +++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td @@ -2116,8 +2116,10 @@ class VIMAGE_TENSOR_Real op, VIMAGE_TENSOR_Pseudo ps, string opName = p let vaddr2 = !if(ps.UpTo2D, !cast(SGPR_NULL_gfx11plus.HWEncoding), ?); let vaddr3 = !if(ps.UpTo2D, !cast(SGPR_NULL_gfx11plus.HWEncoding), ?); + // Set VADDR4 to NULL + let vaddr4 = !cast(SGPR_NULL_gfx11plus.HWEncoding); + // set to 0 based on SPG. - let vaddr4 = 0; let rsrc = 0; let vdata = 0; let d16 = 0; diff --git a/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp b/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp index 15def3637c5a7..b6bbb201f5c5d 100644 --- a/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp +++ b/llvm/lib/Target/DirectX/DirectXAsmPrinter.cpp @@ -52,6 +52,7 @@ void DXILAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) { emitGlobalConstant(GV->getDataLayout(), GV->getInitializer()); } -extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXAsmPrinter() { +extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void +LLVMInitializeDirectXAsmPrinter() { RegisterAsmPrinter X(getTheDirectXTarget()); } diff --git a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp index bcf84403b2c0d..84b1a313df2ea 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetMachine.cpp @@ -53,7 +53,8 @@ using namespace llvm; -extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() { +extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void +LLVMInitializeDirectXTarget() { RegisterTargetMachine X(getTheDirectXTarget()); auto *PR = PassRegistry::getPassRegistry(); initializeDXILIntrinsicExpansionLegacyPass(*PR); diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp index 9a14c01f62ae7..62ad014f3739f 100644 --- a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp +++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp @@ -132,7 +132,8 @@ static MCRegisterInfo *createDirectXMCRegisterInfo(const Triple &Triple) { static MCInstrInfo *createDirectXMCInstrInfo() { return new MCInstrInfo(); } -extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTargetMC() { +extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void +LLVMInitializeDirectXTargetMC() { Target &T = getTheDirectXTarget(); RegisterMCAsmInfo X(T); TargetRegistry::RegisterMCInstrInfo(T, createDirectXMCInstrInfo); diff --git a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp index ae01626e5229d..934bd1b0e8adb 100644 --- a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp +++ b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp @@ -24,7 +24,8 @@ Target &getTheDirectXTarget() { using namespace llvm; -extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTargetInfo() { +extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void +LLVMInitializeDirectXTargetInfo() { RegisterTarget X( getTheDirectXTarget(), "dxil", "DirectX Intermediate Language", "DXIL"); } diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index a6de839de7c28..904aabed9a843 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -371,6 +371,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, ISD::SETUGE, ISD::SETUGT}, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FROUNDEVEN, VT, Legal); } setOperationAction(ISD::CTPOP, GRLenVT, Legal); setOperationAction(ISD::FCEIL, {MVT::f32, MVT::f64}, Legal); @@ -453,6 +457,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, ISD::SETUGE, ISD::SETUGT}, VT, Expand); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::FROUNDEVEN, VT, Legal); } } diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index ca4ee5f89573a..610ba052fbdd5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2424,6 +2424,12 @@ def : Pat<(int_loongarch_lasx_xvpickve_w_f v8f32:$xj, timm:$imm), def : Pat<(int_loongarch_lasx_xvpickve_d_f v4f64:$xj, timm:$imm), (XVPICKVE_D v4f64:$xj, (to_valid_timm timm:$imm))>; +// Vector floating-point conversion +defm : PatXrF; +defm : PatXrF; +defm : PatXrF; +defm : PatXrF; + // load def : Pat<(int_loongarch_lasx_xvld GPR:$rj, timm:$imm), (XVLD GPR:$rj, (to_valid_timm timm:$imm))>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 92402baa0fa0f..64708421c4ed4 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -2552,6 +2552,11 @@ def : Pat<(f64 (froundeven FPR64:$fj)), (f64 (EXTRACT_SUBREG (VFRINTRNE_D (VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)), sub_64))>; +defm : PatVrF; +defm : PatVrF; +defm : PatVrF; +defm : PatVrF; + // load def : Pat<(int_loongarch_lsx_vld GPR:$rj, timm:$imm), (VLD GPR:$rj, (to_valid_timm timm:$imm))>; diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 6addcfab15125..cbc604e87cf1a 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5956,7 +5956,7 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, } // Update weight for the newly-created conditional branch. - if (hasBranchWeightMD(*SI)) { + if (hasBranchWeightMD(*SI) && NewBI->isConditional()) { SmallVector Weights; getBranchWeights(SI, Weights); if (Weights.size() == 1 + SI->getNumCases()) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1b55a3b235228..34b405ced8c0a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -22134,6 +22134,27 @@ bool BoUpSLP::collectValuesToDemote( {VectorizableTree[E.CombinedEntriesWithIndices.front().first].get(), VectorizableTree[E.CombinedEntriesWithIndices.back().first].get()}); + if (E.isAltShuffle()) { + // Combining these opcodes may lead to incorrect analysis, skip for now. + auto IsDangerousOpcode = [](unsigned Opcode) { + switch (Opcode) { + case Instruction::Shl: + case Instruction::AShr: + case Instruction::LShr: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + return true; + default: + break; + } + return false; + }; + if (IsDangerousOpcode(E.getAltOpcode())) + return FinalAnalysis(); + } + switch (E.getOpcode()) { // We can always demote truncations and extensions. Since truncations can diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1f10058ab4a9a..1504acfcf7e52 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4109,6 +4109,12 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase { const VPCanonicalIVPHIRecipe *getCanonicalIV() const { return const_cast(this)->getCanonicalIV(); } + + /// Return the type of the canonical IV for loop regions. + Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); } + const Type *getCanonicalIVType() const { + return getCanonicalIV()->getScalarType(); + } }; inline VPRegionBlock *VPRecipeBase::getRegion() { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bde62dd6dd4bc..f9c15a31167fa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -2372,9 +2372,8 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const { return false; auto *StepC = dyn_cast(getStepValue()->getLiveInIRValue()); auto *StartC = dyn_cast(getStartValue()->getLiveInIRValue()); - auto *CanIV = getRegion()->getCanonicalIV(); return StartC && StartC->isZero() && StepC && StepC->isOne() && - getScalarType() == CanIV->getScalarType(); + getScalarType() == getRegion()->getCanonicalIVType(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 986c801abf684..d491d5669ef18 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -820,7 +820,7 @@ static VPValue *optimizeEarlyExitInductionUser(VPlan &Plan, // Calculate the final index. VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion(); auto *CanonicalIV = LoopRegion->getCanonicalIV(); - Type *CanonicalIVType = CanonicalIV->getScalarType(); + Type *CanonicalIVType = LoopRegion->getCanonicalIVType(); VPBuilder B(cast(PredVPBB)); DebugLoc DL = cast(Op)->getDebugLoc(); @@ -2402,8 +2402,8 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( "index.part.next"); // Create the active lane mask instruction in the VPlan preheader. - VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(TopRegion->getCanonicalIV()->getScalarType(), 1)); + VPValue *ALMMultiplier = + Plan.getOrAddLiveIn(ConstantInt::get(TopRegion->getCanonicalIVType(), 1)); auto *EntryALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC, ALMMultiplier}, DL, "active.lane.mask.entry"); @@ -2503,7 +2503,7 @@ void VPlanTransforms::addActiveLaneMask( } else { VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV); VPValue *ALMMultiplier = Plan.getOrAddLiveIn( - ConstantInt::get(LoopRegion->getCanonicalIV()->getScalarType(), 1)); + ConstantInt::get(LoopRegion->getCanonicalIVType(), 1)); LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask, {WideCanonicalIV, Plan.getTripCount(), ALMMultiplier}, @@ -2775,7 +2775,7 @@ void VPlanTransforms::addExplicitVectorLength( VPBasicBlock *Header = LoopRegion->getEntryBasicBlock(); auto *CanonicalIVPHI = LoopRegion->getCanonicalIV(); - auto *CanIVTy = CanonicalIVPHI->getScalarType(); + auto *CanIVTy = LoopRegion->getCanonicalIVType(); VPValue *StartV = CanonicalIVPHI->getStartValue(); // Create the ExplicitVectorLengthPhi recipe in the main loop. @@ -4336,10 +4336,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, VPBuilder PHBuilder(Plan.getVectorPreheader()); VPValue *UF = Plan.getOrAddLiveIn( - ConstantInt::get(CanIV->getScalarType(), 1 * Plan.getUF())); + ConstantInt::get(VectorLoop->getCanonicalIVType(), 1 * Plan.getUF())); if (VF.isScalable()) { VPValue *VScale = PHBuilder.createElementCount( - CanIV->getScalarType(), ElementCount::getScalable(1)); + VectorLoop->getCanonicalIVType(), ElementCount::getScalable(1)); VPValue *VScaleUF = PHBuilder.createNaryOp(Instruction::Mul, {VScale, UF}); Inc->setOperand(1, VScaleUF); Plan.getVF().replaceAllUsesWith(VScale); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index cfd1a741ee841..f15113c6293bc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -69,8 +69,7 @@ class UnrollState { VPBasicBlock::iterator InsertPtForPhi); VPValue *getConstantVPV(unsigned Part) { - Type *CanIVIntTy = - Plan.getVectorLoopRegion()->getCanonicalIV()->getScalarType(); + Type *CanIVIntTy = Plan.getVectorLoopRegion()->getCanonicalIVType(); return Plan.getOrAddLiveIn(ConstantInt::get(CanIVIntTy, Part)); } diff --git a/llvm/test/CodeGen/AArch64/stackmap.ll b/llvm/test/CodeGen/AArch64/stackmap.ll index 995d2545c6359..26221d0c26eb2 100644 --- a/llvm/test/CodeGen/AArch64/stackmap.ll +++ b/llvm/test/CodeGen/AArch64/stackmap.ll @@ -81,14 +81,14 @@ ; CHECK-NEXT: .hword 8 ; CHECK-NEXT: .hword 0 ; CHECK-NEXT: .hword 0 -; CHECK-NEXT: .word 65535 +; CHECK-NEXT: .word -1 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .hword 8 ; CHECK-NEXT: .hword 0 ; CHECK-NEXT: .hword 0 -; CHECK-NEXT: .word 65535 +; CHECK-NEXT: .word -1 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 diff --git a/llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll b/llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll new file mode 100644 index 0000000000000..22e4a24435f12 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/umin-sub-to-usubo-select-combine.ll @@ -0,0 +1,236 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s + +define i16 @v_underflow_compare_fold_i16(i16 %a, i16 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u16_e32 v1, v0, v1 +; GFX9-NEXT: v_min_u16_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u16 v0.h, v0.l, v1.l +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_min_u16 v0.l, v0.h, v0.l +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i16 %a, %b + %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a) + ret i16 %cond +} + +define i32 @v_underflow_compare_fold_i32(i32 %a, i32 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1 +; GFX9-NEXT: v_min_u32_e32 v0, v1, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_min_u32_e32 v0, v1, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i32 %a, %b + %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +define i32 @v_underflow_compare_fold_i32_commute(i32 %a, i32 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i32_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1 +; GFX9-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i32_commute: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i32 %a, %b + %cond = call i32 @llvm.umin.i32(i32 %a, i32 %sub) + ret i32 %cond +} + +define i32 @v_underflow_compare_fold_i32_multi_use(i32 %a, i32 %b, ptr addrspace(1) %ptr) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i32_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u32_e32 v1, v0, v1 +; GFX9-NEXT: v_min_u32_e32 v0, v1, v0 +; GFX9-NEXT: global_store_dword v[2:3], v1, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i32_multi_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u32_e32 v1, v0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_min_u32_e32 v0, v1, v0 +; GFX11-NEXT: global_store_b32 v[2:3], v1, off +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i32 %a, %b + store i32 %sub, ptr addrspace(1) %ptr + %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +define i64 @v_underflow_compare_fold_i64(i64 %a, i64 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc +; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo +; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] +; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %a, %b + %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +define i64 @v_underflow_compare_fold_i64_commute(i64 %a, i64 %b) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i64_commute: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc +; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[0:1], v[2:3] +; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i64_commute: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo +; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] +; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v3, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %a, %b + %cond = call i64 @llvm.umin.i64(i64 %a, i64 %sub) + ret i64 %cond +} + +define i64 @v_underflow_compare_fold_i64_multi_use(i64 %a, i64 %b, ptr addrspace(1) %ptr) #0 { +; GFX9-LABEL: v_underflow_compare_fold_i64_multi_use: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc +; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[2:3], off +; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_underflow_compare_fold_i64_multi_use: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo +; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] +; GFX11-NEXT: global_store_b64 v[4:5], v[2:3], off +; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_cndmask_b32 v1, v1, v3 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %sub = sub i64 %a, %b + store i64 %sub, ptr addrspace(1) %ptr + %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +define amdgpu_ps i16 @s_underflow_compare_fold_i16(i16 inreg %a, i16 inreg %b) #0 { +; GFX9-LABEL: s_underflow_compare_fold_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_sub_i32 s1, s0, s1 +; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX9-NEXT: s_and_b32 s1, s1, 0xffff +; GFX9-NEXT: s_min_u32 s0, s1, s0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: s_underflow_compare_fold_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_sub_i32 s1, s0, s1 +; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX11-NEXT: s_and_b32 s1, s1, 0xffff +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_min_u32 s0, s1, s0 +; GFX11-NEXT: ; return to shader part epilog + %sub = sub i16 %a, %b + %cond = call i16 @llvm.umin.i16(i16 %sub, i16 %a) + ret i16 %cond +} + +define amdgpu_ps i32 @s_underflow_compare_fold_i32(i32 inreg %a, i32 inreg %b) #0 { +; GFX9-LABEL: s_underflow_compare_fold_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_sub_i32 s1, s0, s1 +; GFX9-NEXT: s_min_u32 s0, s1, s0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: s_underflow_compare_fold_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_sub_i32 s1, s0, s1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_min_u32 s0, s1, s0 +; GFX11-NEXT: ; return to shader part epilog + %sub = sub i32 %a, %b + %cond = call i32 @llvm.umin.i32(i32 %sub, i32 %a) + ret i32 %cond +} + +define amdgpu_ps i64 @s_underflow_compare_fold_i64(i64 inreg %a, i64 inreg %b) #0 { +; GFX9-LABEL: s_underflow_compare_fold_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_sub_u32 s2, s0, s2 +; GFX9-NEXT: v_mov_b32_e32 v0, s0 +; GFX9-NEXT: s_subb_u32 s3, s1, s3 +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, s[2:3], v[0:1] +; GFX9-NEXT: s_and_b64 s[4:5], vcc, exec +; GFX9-NEXT: s_cselect_b32 s1, s3, s1 +; GFX9-NEXT: s_cselect_b32 s0, s2, s0 +; GFX9-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: s_underflow_compare_fold_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_sub_u32 s2, s0, s2 +; GFX11-NEXT: s_subb_u32 s3, s1, s3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[2:3], s[0:1] +; GFX11-NEXT: s_and_b32 s4, s4, exec_lo +; GFX11-NEXT: s_cselect_b32 s0, s2, s0 +; GFX11-NEXT: s_cselect_b32 s1, s3, s1 +; GFX11-NEXT: ; return to shader part epilog + %sub = sub i64 %a, %b + %cond = call i64 @llvm.umin.i64(i64 %sub, i64 %a) + ret i64 %cond +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll index 79407c3fd4c8b..fa5f27edf615e 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/fp-rounding.ll @@ -7,38 +7,8 @@ define void @ceil_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrp.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrp.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrp.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrp.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -52,21 +22,7 @@ define void @ceil_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrp.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrp.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrp.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -81,38 +37,8 @@ define void @floor_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrm.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrm.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrm.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrm.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -126,21 +52,7 @@ define void @floor_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrm.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrm.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrm.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -155,38 +67,8 @@ define void @trunc_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrz.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrz.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrz.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrz.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -200,21 +82,7 @@ define void @trunc_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrz.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrz.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrz.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -229,38 +97,8 @@ define void @roundeven_v8f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v8f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 5 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr2, $xr0, 4 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrne.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 6 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 48 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: xvpickve.w $xr3, $xr0, 0 -; CHECK-NEXT: vreplvei.w $vr3, $vr3, 0 -; CHECK-NEXT: vfrintrne.s $vr3, $vr3 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 16 -; CHECK-NEXT: xvpickve.w $xr1, $xr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr3, $vr1, 32 -; CHECK-NEXT: xvpickve.w $xr0, $xr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrne.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr3, $vr0, 48 -; CHECK-NEXT: xvpermi.q $xr3, $xr2, 2 -; CHECK-NEXT: xvst $xr3, $a0, 0 +; CHECK-NEXT: xvfrintrne.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <8 x float>, ptr %a0 @@ -274,21 +112,7 @@ define void @roundeven_v4f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v4f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr2, $xr0, 2 -; CHECK-NEXT: vreplvei.d $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrne.d $vr2, $vr2 -; CHECK-NEXT: vextrins.d $vr2, $vr1, 16 -; CHECK-NEXT: xvpickve.d $xr1, $xr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.d $vr1, $vr1 -; CHECK-NEXT: xvpickve.d $xr0, $xr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vfrintrne.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 -; CHECK-NEXT: xvpermi.q $xr0, $xr2, 2 +; CHECK-NEXT: xvfrintrne.d $xr0, $xr0 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll index 1ca6290a2239b..cb01ac0358ab3 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/fp-rounding.ll @@ -7,22 +7,8 @@ define void @ceil_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrp.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrp.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -36,13 +22,7 @@ define void @ceil_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrp.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrp.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -57,22 +37,8 @@ define void @floor_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrm.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrm.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -86,13 +52,7 @@ define void @floor_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrm.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrm.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -107,22 +67,8 @@ define void @trunc_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrz.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrz.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -136,13 +82,7 @@ define void @trunc_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrz.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrz.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -157,22 +97,8 @@ define void @roundeven_v4f32(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v4f32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vreplvei.w $vr2, $vr0, 0 -; CHECK-NEXT: vreplvei.w $vr2, $vr2, 0 -; CHECK-NEXT: vfrintrne.s $vr2, $vr2 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 16 -; CHECK-NEXT: vreplvei.w $vr1, $vr0, 2 -; CHECK-NEXT: vreplvei.w $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.s $vr1, $vr1 -; CHECK-NEXT: vextrins.w $vr2, $vr1, 32 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 3 -; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrne.s $vr0, $vr0 -; CHECK-NEXT: vextrins.w $vr2, $vr0, 48 -; CHECK-NEXT: vst $vr2, $a0, 0 +; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: %v0 = load <4 x float>, ptr %a0 @@ -186,13 +112,7 @@ define void @roundeven_v2f64(ptr %res, ptr %a0) nounwind { ; CHECK-LABEL: roundeven_v2f64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vreplvei.d $vr1, $vr0, 1 -; CHECK-NEXT: vreplvei.d $vr1, $vr1, 0 -; CHECK-NEXT: vfrintrne.d $vr1, $vr1 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 -; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0 ; CHECK-NEXT: vfrintrne.d $vr0, $vr0 -; CHECK-NEXT: vextrins.d $vr0, $vr1, 16 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll b/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll index bd8d882cda39b..9dd402d13b8e0 100644 --- a/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll +++ b/llvm/test/CodeGen/MLRegAlloc/dev-mode-extra-features-logging.ll @@ -26,7 +26,7 @@ ; Also, the first eviction problem is significantly less than 300 instructions. Check ; that there is a zero value. ; Note: we're regex-ing some of the opcodes to avoid test flakyness. -; CHECK: instructions: 20,{{([0-9]{4})}},1{{([0-9]{3})}},2{{([0-9]{3})}},{{.*}},0, +; CHECK: instructions: 20,{{([0-9]{4})}},{{([0-9]{4})}},{{([0-9]{4})}},{{.*}},0, ; Only the candidate virtreg and the 10th LR are included in this problem. Make ; sure the other LRs have values of zero. There are 2700 0s followed by some 1s. ; There's a limit to how many repetitions can be matched. diff --git a/llvm/test/CodeGen/SystemZ/stackmap.ll b/llvm/test/CodeGen/SystemZ/stackmap.ll index 05b8de756c032..f414ea33a6e80 100644 --- a/llvm/test/CodeGen/SystemZ/stackmap.ll +++ b/llvm/test/CodeGen/SystemZ/stackmap.ll @@ -84,14 +84,14 @@ ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .long 65535 +; CHECK-NEXT: .long -1 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 ; CHECK-NEXT: .short 8 ; CHECK-NEXT: .short 0 ; CHECK-NEXT: .short 0 -; CHECK-NEXT: .long 65535 +; CHECK-NEXT: .long -1 ; SmallConstant ; CHECK-NEXT: .byte 4 ; CHECK-NEXT: .byte 0 diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s index fec8ba19f93fe..0a480a73cde5b 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vimage.s @@ -2,33 +2,33 @@ ; RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -show-encoding %s 2>&1 | FileCheck --check-prefix=GFX12-ERR --implicit-check-not=error: --strict-whitespace %s tensor_load_to_lds s[0:3], s[4:11] -// GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c] +// GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS -// GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c] +// GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] -// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10] +// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV -// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10] +// GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_store_from_lds s[0:3], s[4:11] -// GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c] +// GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS -// GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c] +// GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] -// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10] +// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV -// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10] +// GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt index 9afaa075ea838..800579391d8eb 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vimage.txt @@ -1,25 +1,25 @@ # RUN: llvm-mc -disassemble -triple=amdgcn -mcpu=gfx1250 -show-encoding %s | FileCheck --check-prefix=GFX1250 %s -# GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c] -0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c +# GFX1250: tensor_load_to_lds s[0:3], s[4:11] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c] +0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c -# GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c] -0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c +# GFX1250: tensor_load_to_lds s[0:3], s[4:11] th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c] +0x01,0x00,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c -# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10] -0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10 +# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10] +0x01,0x00,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10 -# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10] -0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10 +# GFX1250: tensor_load_to_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10] +0x01,0x00,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10 -# GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c] -0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x7c,0x7c +# GFX1250: tensor_store_from_lds s[0:3], s[4:11] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c] +0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x7c,0x7c -# GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c] -0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x00,0x00,0x04,0x7c,0x7c +# GFX1250: tensor_store_from_lds s[0:3], s[4:11] th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c] +0x01,0x40,0x71,0xd0,0x00,0x00,0x3c,0x7c,0x00,0x04,0x7c,0x7c -# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10] -0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x00,0x00,0x04,0x0c,0x10 +# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10] +0x01,0x40,0x71,0xd0,0x00,0x00,0x00,0x7c,0x00,0x04,0x0c,0x10 -# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10] -0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x00,0x00,0x04,0x0c,0x10 +# GFX1250: tensor_store_from_lds s[0:3], s[4:11], s[12:15], s[16:19] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10] +0x01,0x40,0x71,0xd0,0x00,0x00,0x68,0x7c,0x00,0x04,0x0c,0x10 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll index cc2e16e2b099b..959b2350d9d78 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll @@ -6,14 +6,12 @@ define float @test(i8 %0) { ; CHECK-SAME: i8 [[TMP0:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> , i8 [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i16> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i16> [[TMP2]], -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> [[TMP4]], <2 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP9]] to i32 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = zext i16 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]] ; CHECK-NEXT: switch i32 [[TMP8]], label %[[EXIT:.*]] [ ; CHECK-NEXT: i32 0, label %[[EXIT]] diff --git a/llvm/test/Transforms/SimplifyCFG/pr165301.ll b/llvm/test/Transforms/SimplifyCFG/pr165301.ll index 4a539d77af3cb..1df655250f57e 100644 --- a/llvm/test/Transforms/SimplifyCFG/pr165301.ll +++ b/llvm/test/Transforms/SimplifyCFG/pr165301.ll @@ -1,11 +1,11 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 6 ; RUN: opt -S -passes="simplifycfg" < %s | FileCheck %s ; Make sure there's no use after free when removing incoming values from PHI nodes -define i32 @pr165301(i1 %cond) { +define i32 @pr165301(i1 %cond) !prof !0 { ; CHECK-LABEL: define i32 @pr165301( -; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-SAME: i1 [[COND:%.*]]) !prof [[PROF0:![0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[SWITCHBB:.*]] ; CHECK: [[SWITCHBB]]: @@ -18,9 +18,14 @@ switchbb: switch i1 %cond, label %default [ i1 false, label %switchbb i1 true, label %switchbb - ] + ], !prof !1 default: %phi.lcssa = phi i32 [ 0, %switchbb ] ret i32 %phi.lcssa } +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 2, i32 3, i32 5} +;. +; CHECK: [[PROF0]] = !{!"function_entry_count", i32 10} +;. diff --git a/llvm/utils/profcheck-xfail.txt b/llvm/utils/profcheck-xfail.txt index 83bffc70574a8..380b162d8c58c 100644 --- a/llvm/utils/profcheck-xfail.txt +++ b/llvm/utils/profcheck-xfail.txt @@ -517,8 +517,11 @@ Instrumentation/TypeSanitizer/alloca-only.ll Instrumentation/TypeSanitizer/anon.ll Instrumentation/TypeSanitizer/basic.ll Instrumentation/TypeSanitizer/basic-nosan.ll +Instrumentation/TypeSanitizer/basic_outlined.ll +Instrumentation/TypeSanitizer/basic_verify_outlined.ll Instrumentation/TypeSanitizer/byval.ll Instrumentation/TypeSanitizer/globals.ll +Instrumentation/TypeSanitizer/globals_outlined.ll Instrumentation/TypeSanitizer/invalid-metadata.ll Instrumentation/TypeSanitizer/memintrinsics.ll Instrumentation/TypeSanitizer/nosanitize.ll @@ -729,6 +732,7 @@ Transforms/ExpandVariadics/expand-va-intrinsic-split-simple.ll Transforms/ExpandVariadics/intrinsics.ll Transforms/FixIrreducible/basic.ll Transforms/FixIrreducible/bug45623.ll +Transforms/FixIrreducible/callbr.ll Transforms/FixIrreducible/nested.ll Transforms/FixIrreducible/switch.ll Transforms/GCOVProfiling/atomic-counter.ll @@ -1106,6 +1110,7 @@ Transforms/LoopSimplifyCFG/update_parents.ll Transforms/LoopUnroll/peel-last-iteration-expansion-cost.ll Transforms/LoopUnroll/peel-last-iteration-with-guards.ll Transforms/LoopUnroll/peel-last-iteration-with-variable-trip-count.ll +Transforms/LoopUnroll/runtime-loop-multiple-exits.ll Transforms/LoopVersioning/add-phi-update-users.ll Transforms/LoopVersioning/basic.ll Transforms/LoopVersioning/bound-check-partially-known.ll diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td index b39207fc30dd7..e00f3c1526005 100644 --- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td +++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td @@ -323,8 +323,8 @@ def MemRef_ReallocOp : MemRef_Op<"realloc"> { ```mlir %new = memref.realloc %old : memref<64xf32> to memref<124xf32> - %4 = memref.load %new[%index] // ok - %5 = memref.load %old[%index] // undefined behavior + %4 = memref.load %new[%index] : memref<124xf32> // ok + %5 = memref.load %old[%index] : memref<64xf32> // undefined behavior ``` }]; @@ -445,9 +445,10 @@ def MemRef_AllocaScopeOp : MemRef_Op<"alloca_scope", operation: ```mlir - %result = memref.alloca_scope { + %result = memref.alloca_scope -> f32 { + %value = arith.constant 1.0 : f32 ... - memref.alloca_scope.return %value + memref.alloca_scope.return %value : f32 } ``` @@ -478,7 +479,7 @@ def MemRef_AllocaScopeReturnOp : MemRef_Op<"alloca_scope.return", to indicate which values are going to be returned. For example: ```mlir - memref.alloca_scope.return %value + memref.alloca_scope.return %value : f32 ``` }]; @@ -543,11 +544,11 @@ def MemRef_CastOp : MemRef_Op<"cast", [ Example: ```mlir - Cast to concrete shape. - %4 = memref.cast %1 : memref<*xf32> to memref<4x?xf32> + // Cast to concrete shape. + %4 = memref.cast %1 : memref<*xf32> to memref<4x?xf32> - Erase rank information. - %5 = memref.cast %1 : memref<4x?xf32> to memref<*xf32> + // Erase rank information. + %5 = memref.cast %1 : memref<4x?xf32> to memref<*xf32> ``` }]; @@ -613,8 +614,8 @@ def MemRef_DeallocOp : MemRef_Op<"dealloc", [MemRefsNormalizable]> { Example: ```mlir - %0 = memref.alloc() : memref<8x64xf32, affine_map<(d0, d1) -> (d0, d1), 1>> - memref.dealloc %0 : memref<8x64xf32, affine_map<(d0, d1) -> (d0, d1), 1>> + %0 = memref.alloc() : memref<8x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1> + memref.dealloc %0 : memref<8x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1> ``` }]; @@ -728,13 +729,13 @@ def MemRef_DmaStartOp : MemRef_Op<"dma_start"> { space 1 at indices [%k, %l], would be specified as follows: ```mlir - %num_elements = arith.constant 256 + %num_elements = arith.constant 256 : index %idx = arith.constant 0 : index - %tag = memref.alloc() : memref<1 x i32, affine_map<(d0) -> (d0)>, 4> - dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx] : - memref<40 x 128 x f32>, affine_map<(d0) -> (d0)>, 0>, - memref<2 x 1024 x f32>, affine_map<(d0) -> (d0)>, 1>, - memref<1 x i32>, affine_map<(d0) -> (d0)>, 2> + %tag = memref.alloc() : memref<1 x i32, affine_map<(d0) -> (d0)>, 2> + memref.dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx] : + memref<40 x 128 x f32, affine_map<(d0, d1) -> (d0, d1)>, 0>, + memref<2 x 1024 x f32, affine_map<(d0, d1) -> (d0, d1)>, 1>, + memref<1 x i32, affine_map<(d0) -> (d0)>, 2> ``` If %stride and %num_elt_per_stride are specified, the DMA is expected to @@ -742,8 +743,8 @@ def MemRef_DmaStartOp : MemRef_Op<"dma_start"> { memory space 0 until %num_elements are transferred. ```mlir - dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx], %stride, - %num_elt_per_stride : + memref.dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%idx], %stride, + %num_elt_per_stride : ``` * TODO: add additional operands to allow source and destination striding, and @@ -891,10 +892,10 @@ def MemRef_DmaWaitOp : MemRef_Op<"dma_wait"> { Example: ```mlir - dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%index] : - memref<2048 x f32>, affine_map<(d0) -> (d0)>, 0>, - memref<256 x f32>, affine_map<(d0) -> (d0)>, 1> - memref<1 x i32>, affine_map<(d0) -> (d0)>, 2> + memref.dma_start %src[%i, %j], %dst[%k, %l], %num_elements, %tag[%index] : + memref<2048 x f32, affine_map<(d0) -> (d0)>, 0>, + memref<256 x f32, affine_map<(d0) -> (d0)>, 1>, + memref<1 x i32, affine_map<(d0) -> (d0)>, 2> ... ... dma_wait %tag[%index], %num_elements : memref<1 x i32, affine_map<(d0) -> (d0)>, 2> @@ -1004,8 +1005,8 @@ def MemRef_ExtractStridedMetadataOp : MemRef_Op<"extract_strided_metadata", [ ```mlir %base, %offset, %sizes:2, %strides:2 = - memref.extract_strided_metadata %memref : - memref<10x?xf32>, index, index, index, index, index + memref.extract_strided_metadata %memref : memref<10x?xf32> + -> memref, index, index, index, index, index // After folding, the type of %m2 can be memref<10x?xf32> and further // folded to %memref. @@ -1013,7 +1014,7 @@ def MemRef_ExtractStridedMetadataOp : MemRef_Op<"extract_strided_metadata", [ offset: [%offset], sizes: [%sizes#0, %sizes#1], strides: [%strides#0, %strides#1] - : memref to memref + : memref to memref> ``` }]; @@ -1182,10 +1183,10 @@ def MemRef_GlobalOp : MemRef_Op<"global", [Symbol]> { ```mlir // Private variable with an initial value. - memref.global "private" @x : memref<2xf32> = dense<0.0,2.0> + memref.global "private" @x : memref<2xf32> = dense<[0.0, 2.0]> // Private variable with an initial value and an alignment (power of 2). - memref.global "private" @x : memref<2xf32> = dense<0.0,2.0> {alignment = 64} + memref.global "private" @x : memref<2xf32> = dense<[0.0, 2.0]> {alignment = 64} // Declaration of an external variable. memref.global "private" @y : memref<4xi32> @@ -1194,7 +1195,7 @@ def MemRef_GlobalOp : MemRef_Op<"global", [Symbol]> { memref.global @z : memref<3xf16> = uninitialized // Externally visible constant variable. - memref.global constant @c : memref<2xi32> = dense<1, 4> + memref.global constant @c : memref<2xi32> = dense<[1, 4]> ``` }]; @@ -1555,7 +1556,8 @@ def MemRef_ReinterpretCastOp %dst = memref.reinterpret_cast %src to offset: [%offset], sizes: [%sizes], - strides: [%strides] + strides: [%strides] : + memref<*xf32> to memref> ``` means that `%dst`'s descriptor will be: ```mlir @@ -1695,12 +1697,12 @@ def MemRef_ReshapeOp: MemRef_Op<"reshape", [ ```mlir // Reshape statically-shaped memref. %dst = memref.reshape %src(%shape) - : (memref<4x1xf32>, memref<1xi32>) to memref<4xf32> + : (memref<4x1xf32>, memref<1xi32>) -> memref<4xf32> %dst0 = memref.reshape %src(%shape0) - : (memref<4x1xf32>, memref<2xi32>) to memref<2x2xf32> + : (memref<4x1xf32>, memref<2xi32>) -> memref<2x2xf32> // Flatten unranked memref. %dst = memref.reshape %src(%shape) - : (memref<*xf32>, memref<1xi32>) to memref + : (memref<*xf32>, memref<1xi32>) -> memref ``` b. Source type is ranked or unranked. Shape argument has dynamic size. @@ -1709,10 +1711,10 @@ def MemRef_ReshapeOp: MemRef_Op<"reshape", [ ```mlir // Reshape dynamically-shaped 1D memref. %dst = memref.reshape %src(%shape) - : (memref, memref) to memref<*xf32> + : (memref, memref) -> memref<*xf32> // Reshape unranked memref. %dst = memref.reshape %src(%shape) - : (memref<*xf32>, memref) to memref<*xf32> + : (memref<*xf32>, memref) -> memref<*xf32> ``` }]; diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 6b4c185d7d897..83406c8c75dcf 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -11,7 +11,6 @@ #include "mlir/Dialect/Index/IR/IndexOps.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/XeGPU/IR/XeGPU.h" -#include "mlir/Dialect/XeGPU/Utils/XeGPUUtils.h" #include "mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h" #include "mlir/IR/Builders.h" #include "mlir/IR/DialectImplementation.h" diff --git a/mlir/test/mlir-runner/memref-reshape.mlir b/mlir/test/mlir-runner/memref-reshape.mlir index 8c17f1fd02358..b264e0285953f 100644 --- a/mlir/test/mlir-runner/memref-reshape.mlir +++ b/mlir/test/mlir-runner/memref-reshape.mlir @@ -65,8 +65,8 @@ func.func @reshape_ranked_memref_to_ranked(%input : memref<2x3xf32>, func.func @reshape_unranked_memref_to_ranked(%input : memref<2x3xf32>, %shape : memref<2xindex>) { %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32> - %output = memref.reshape %input(%shape) - : (memref<2x3xf32>, memref<2xindex>) -> memref + %output = memref.reshape %unranked_input(%shape) + : (memref<*xf32>, memref<2xindex>) -> memref %unranked_output = memref.cast %output : memref to memref<*xf32> call @printMemrefF32(%unranked_output) : (memref<*xf32>) -> () @@ -95,8 +95,8 @@ func.func @reshape_unranked_memref_to_unranked(%input : memref<2x3xf32>, %shape : memref<2xindex>) { %unranked_input = memref.cast %input : memref<2x3xf32> to memref<*xf32> %dyn_size_shape = memref.cast %shape : memref<2xindex> to memref - %output = memref.reshape %input(%dyn_size_shape) - : (memref<2x3xf32>, memref) -> memref<*xf32> + %output = memref.reshape %unranked_input(%dyn_size_shape) + : (memref<*xf32>, memref) -> memref<*xf32> call @printMemrefF32(%output) : (memref<*xf32>) -> () // CHECK: rank = 2 offset = 0 sizes = [3, 2] strides = [2, 1] data = diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 1385e1a802d5b..83414ceed5ca5 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -3729,7 +3729,6 @@ cc_library( ":XeGPUAttrInterfaceIncGen", ":XeGPUEnumsIncGen", ":XeGPUIncGen", - ":XeGPUUtils", ":XeGPUuArch", ":XeVMDialect", "//llvm:Support",